configuration DB
sqlHost <- "localhost"
sqlUserName <- "sa"
sqlPassword <- "yourStrong(!)Password"
sqlDefaultDb <- "NhanesLandingZone"
Load libs
library(naniar)
library(visdat)
library(corrplot)
library(phonto)
Load data
demo = phonto::nhanes('DEMO_E')
DT::datatable(demo)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
Missing pattern across variables (eg. age)
gg_miss_fct(demo,fct = RIDAGEYR)

show missing percentage
gg_miss_var(demo,show_pct = TRUE)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.

Plot missing pattern
vis_miss(demo,warn_large_data=FALSE)

Missing Correlation
missing_cols = sapply(demo, function(x)sum(is.na(x)))
missing_cols = missing_cols[missing_cols>0] # select the data with missing values
missing_cols
## RIDEXMON RIDAGEMN RIDAGEEX DMQMILIT DMDCITZN DMDYRSUS DMDEDUC3 DMDEDUC2
## 387 439 782 3778 6 8439 7649 4214
## DMDSCHOL DMDMARTL INDHHIN2 INDFMIN2 INDFMPIR RIDEXPRG DMDHRBR2 DMDHREDU
## 7879 4214 77 94 894 8941 268 266
## DMDHRMAR DMDHSEDU SIALANG SIAPROXY SIAINTRP FIALANG FIAPROXY FIAINTRP
## 341 4738 1 1 1 128 128 128
## MIALANG MIAPROXY MIAINTRP AIALANG
## 3003 3003 3003 2415
correlation = cor(is.na(demo[,names(missing_cols)]))
corrplot(correlation, type = "upper", order = "hclust",
tl.col = "black", tl.srt = 45)
